# -*- coding:utf-8 -*-
import re

f=open('d:/tmp/1.html')
data=f.read().decode('gb2312').encode('utf-8')
f.close()

patt='<tr>[\s\S]+?href="([\s\S]+?)"[\s\S]+?" target="_blank">([^<]+?)</a></div>' \
    '[\s\S]+?">([^<]+?)</a></div>[\s\S]+?</span>([\s\S]+?)</div></td>'

for tr in re.findall(patt,data):
    for td in tr:
        print td.decode('utf-8')










